
* +++++++++++++++++++++
* MERGE REGIONAL DATA
* FOR ANALYSIS DATASET
* +++++++++++++++++++++

* construct crosswalk from Kreis to Bundesland
* +++++++++++++++

* construct crosswalk from Kreis to Bundesland so that we can control for state fixed effects
* simply load attribute table from shapefile of all Bundeslaender
* and combine it with attribute table from shapefile of all Kreise
* source: German Federal Agency for Cartography and Geodesy, data product: VG250
import delimited "${data_raw}/geo/lan_shapefile_info.csv", clear
bys ags (gen) : keep if _n==1 

keep sn_l gen
ren gen state
replace state = "Baden-Württemberg" if state == "Baden-WÃ¼rttemberg"
replace state = "Thüringen" if state == "ThÃ¼ringen"
tempfile land
save `land'


import delimited "${data_raw}/geo/krs_shapefile_info.csv", clear
bys ags (gen) : keep if _n==1
keep ags gen sn_l 
merge m:1 sn_l using `land', assert(3) nogen
tempfile cw 
save `cw'

* combine integration data with other data
* +++++++++++++++

* fb integration data
import delimited "${data_raw}/fb_integration/regional_measures_final.csv", clear

* keep main vars only
keep nuts3 sample sy_n native_n measure sy_avg_resid native_avg_resid
drop if inlist(measure, "n_frnd_nat_lcl_15_16_qrtr", "n_frnd_nat_lcl_17_18_qrtr", ///
	"n_frnd_nat_lcl_19_20_qrtr", "n_frnd_nat_lcl_qrtr")
replace measure = regexr(measure, "recent", "rec")
isid nuts3 measure sample

* reshape wide on measure of integration
ren *_resid* *_re*
ds sy_* native_*
local vars `r(varlist)'
renvars sy_* native_*, suff(_)
qui: levelsof measure, local(measures)
reshape wide sy_* native_*, i(nuts3 sample) j(measure) string
foreach var in `vars' {
	foreach m in `measures' {
		ren `var'_`m' `m'_`var'
		destring `m'_`var', replace force
		* drop variables that are just missing
		qui: count if `m'_`var' !=. 
		if `r(N)' == 0	{
			drop `m'_`var'
		}
	}
}	

* construct measure of friending bias (only native friends)
foreach suff in avg_re {
	gen n_frnd_nat_lcl_sy_nat_`suff' = n_frnd_nat_lcl_sy_`suff' / ///
		n_frnd_nat_lcl_native_`suff'
}

* reshape wide on sample
replace sample ="a" if sample =="all"
replace sample ="f" if sample =="isfemale_1"
replace sample ="m" if sample =="isfemale_0"
replace sample ="a1" if sample =="age_18_24"
replace sample ="a2" if sample =="age_25_34"
replace sample ="a3" if sample =="age_35_44"
replace sample ="a4" if sample =="age_45_54"
replace sample ="a5" if sample =="age_55"

reshape wide in_admin_proasyl_native_n - n_frnd_nat_lcl_sy_nat_avg_re, ///
	i(nuts3) j(sample) string
ren *a *
	
tempfile integration
save `integration'

* crosswalk NUTS3 codes to Gemeindeschluessel 
import delimited "${data_raw}/nuts_xw/nuts3_DE_xw.csv", clear

merge 1:1 nuts3 using `integration', assert(3) keep(3) nogen
ren id ags 

* add states
merge 1:1 ags using `cw', keep(3) assert(2 3) nogen
 
* merge apprenticeship data 
preserve 
	use "${data_derived}/positions_and_applicants_per_year_and_kreis.dta", clear
	gen pre_post = "pre" if inrange(year, 2010, 2015)
	replace pre_post = "post" if year > 2015
	collapse (mean) *applicants *open_positions *positions_per_applicant, by(ags pre_post)
	ren (*open_positions *positions_per_applicant) (*open_pos *pos_per_app)
	renvars *applicants *open_pos *pos_per_app, suff(_)
	reshape wide *applicants* *open_pos* *pos_per_app*, i(ags) j(pre_post) string
	tempfile apprenticeship_data
	save `apprenticeship_data'
restore 
merge 1:1 ags using `apprenticeship_data', keep(3) nogen

* merge labor market data
preserve
	use "${data_derived}/labor_market_stats_2015_to_2020.dta", clear
	merge m:1 ags year using "${data_derived}/kreis_total_pop_by_age_gender.dta", ///
	keep(3) keepusing(pop_syr* pop_tot) nogen
	foreach i in syria tot {
		gen num_emp_train_any_`i' = num_employed_`i' + /// 
			num_employed_min_`i' + num_in_train_`i'
	}
	foreach var in emp_train_any_syria employed_syria employed_min_syria in_train_syria {
		gen frac_`var' = num_`var'/pop_syr_tot
	}
	foreach var in emp_train_any_tot employed_tot employed_min_tot in_train_tot {
		gen frac_`var' = num_`var'/pop_tot
	}
	keep if year ==2019
	collapse *emp_train_any_* *employed_* *in_train_* ///
	,by(ags)
	tempfile lm 
	save `lm'
restore
merge 1:1 ags using `lm', keep(3) assert(2 3) nogen

* merge integration sports clubs data
preserve 
	use "${data_derived}/integration_durch_sport_clubs.dta", clear
	gen clubs = 1 
	collapse (rawsum) clubs, by(ags)
	merge 1:m ags using "${data_derived}/kreis_total_pop_by_age_gender.dta", ///
		keep(2 3) keepusing(pop_syr* pop_tot year) nogen
	replace clubs = 0 if clubs ==.
	gen clubs_per_syr = clubs / pop_syr_tot
	gen log_clubs_per_syr = log(clubs_per_syr)
	keep if year ==2019
	keep ags *clubs*
	tempfile clubs
	save `clubs'
restore	
merge 1:1 ags using `clubs', keep(3) assert(2 3) nogen

* integration course data
preserve
	use "${data_derived}/integration_by_nuts3_2015_to_2019.dta", clear
	keep ags *_15_19
	merge 1:m ags using "${data_derived}/kreis_total_pop_by_age_gender.dta", ///
		nogen assert(1 2 3) keep(3) keepusing(pop_syr_tot year)
	keep if year == 2019
	foreach var in courses_compl_15_19 courses_start_15_19 ///
		new_eligible_15_19 new_partic_15_19 finished_15_19 {
			gen per_syr_`var' = num_`var' / pop_syr_tot
			gen log_per_syr_`var' = log((1+num_`var') / pop_syr_tot)
		}	
	keep ags *per_syr* num_courses_compl_15_19 num_courses_start_15_19 ///
		num_new_eligible_15_19 num_new_partic_15_19 num_finished_15_19
	ren *courses* *cours*
	tempfile courses
	save `courses'
restore	
merge 1:1 ags using `courses', keep(3) assert(2 3) nogen	
	
* ProAsyl groups
preserve
	use "${data_derived}/list_of_proasyl_groups_clean.dta", clear
	gen num_proasyl_group = 1
	collapse (rawsum) num_proasyl_group, by(ags)
	merge 1:m ags using "${data_derived}/kreis_total_pop_by_age_gender.dta", ///
		nogen assert(2 3) keep(2 3) keepusing(pop_syr_tot pop_tot year)
	keep if year ==2018 // given that ProAsyl group list is a bit outdated
	replace num_proasyl_group = 0 if num_proasyl_group ==.
	gen per_syr_proasyl_groups = num_proasyl_group / pop_syr_tot
	gen per_pop_proasyl_groups = num_proasyl_group / pop_tot
	keep ags per_* 
	foreach var of varlist per_* {
		gen log_`var' = log(`var')
	}
	tempfile proasyl
	save `proasyl'
restore	
merge 1:1 ags using `proasyl', keep(3) assert(2 3) nogen

* crime data
merge 1:1 ags using /// 
	"${data_derived}/suspects_by_nat_by_kreis.dta", ///
	keep(3) assert(2 3) nogen keepusing(*per_pop* clearance* pct_chg*)

* election results European election 2019 and 2014 
merge 1:1 ags using "${data_derived}/european_election_results.dta", ///
	keep(3) assert(2 3) nogen
foreach var of varlist afd_201? frac_voted_201? {
	gen log_`var' = log(`var')
	* demean AfD vote share by state
	areg `var' [w=n_frnd_nat_lcl_native_n], absorb(state)
	predict `var'_dev_st, r
}
	
* local economy
merge 1:1 ags using "${data_derived}/econ_vars.dta", ///
	keep(3) assert(2 3) nogen

* population density
merge 1:1 ags using "${data_derived}//urban_rural_structure.dta", ///
	keep(3) assert(2 3) nogen
gen log_pop_dens_2018 = log(pop_dens_2018)
	
* foreign population
preserve
	use "${data_derived}/kreis_total_pop_by_age_gender.dta", clear
	keep if inlist(year, 2010, 2019)
	keep ags year pop_tot pop_for_tot pop_syr_tot ///
		frac_for_tot frac_syr_tot log_frac_for_tot log_frac_syr_tot
	renvars *_tot, suff(_)
	reshape wide *_tot*, i(ags) j(year)
	tempfile foreign_pop
	save `foreign_pop'
restore
merge 1:1 ags using `foreign_pop', ///
	keep(3) assert(2 3) nogen

* average age and share female in 2014, i.e. before arrival of immigrants
preserve
	use "${data_derived}/kreis_total_pop_by_age_gender.dta", clear
	keep if year ==2014 
	keep ags avg_age share_female
	tempfile age_gender
	save `age_gender'
restore
merge 1:1 ags using `age_gender', keep(3) assert(2 3) nogen	
	
* religion 
merge 1:1 ags using "${data_derived}/religion_census_2011.dta", nogen assert(2 3)	
gen log_frac_christian=log(frac_christian)
	
* teacher unemployment
merge 1:1 ags using "${data_derived}/unemployed_and_job_seeking_teachers2011_20.dta", nogen assert(2 3)
gen unemp_uni_gen_schl2014_per_pop = unemp_uni2014_per_pop + unemp_gen_schl2014_per_pop
gen unemp_uni_gen_oth2014_per_pop = unemp_uni2014_per_pop + unemp_gen_schl2014_per_pop ///
	+ unemp_teach_oth2014_per_pop
gen log_unemp_tot2014_per_pop = log(unemp_tot2014_per_pop) 
 
* label variables so that graphs can be labeled easily
foreach suff in avg_re {
	foreach group in sy native {
		if "`group'" == "sy" 			local label "Among Syrians"
		if "`group'" == "native" 		local label "Among Natives"
		cap label var n_frnd_eu_lcl_`group'_`suff' "Num Friends in Europe `label'"
		cap label var n_frnd_nat_lcl_`group'_`suff' "Num Friends Native `label'"
		cap label var n_frnd_sy_lcl_`group'_`suff' "Num Friends Syrian `label'"
		cap label var n_frnd_t5_lcl_`group'_`suff' "Num Friends Top-Refugee Countries `label'"
		cap label var produ_any_de_`group'_`suff' "% Produced any Content in German `label'"
	}
	cap label var n_frnd_nat_lcl_sy_nat_`suff' "Friending Bias"
}

label var pos_per_app_pre "Positions (Lehrstellen) per Applicant 2010-2014"
label var pos_per_app_post "Positions (Lehrstellen) per Applicant 2015-2019"
label var log_pos_per_app_pre "Log Positions (Lehrstellen) per Applicant 2010-2014"
label var log_pos_per_app_post "Log Positions (Lehrstellen) per Applicant 2015-2019"

label var frac_emp_train_any_syria "Fraction of Syrians Employed or in Training"
label var frac_employed_syria "Fraction of Syrians Employed w/ Soc. Sec."
label var frac_employed_min_syria "Fraction of Syrians Employed w/o Soc. Sec."
label var frac_in_train_syria "Fraction of Syrians In Training"

label var clubs "Number of Sports Clubs"
label var clubs_per_syr "Number of Sports Clubs per Syrian"
label var log_clubs_per_syr "Log Number of Sports Clubs per Syrian"

label var per_pop_proasyl_groups "Number of ProAsyl Groups per Pop"
label var per_syr_proasyl_groups "Number of ProAsyl Groups per Syrian"
label var log_per_pop_proasyl_groups "Log Number of ProAsyl Groups per Pop"
label var log_per_syr_proasyl_groups "Log Number of ProAsyl Groups per Syrian"

label var per_syr_cours_compl_15_19 "#Integration Courses Completed 2015-2019 per Syrian"
label var per_syr_cours_start_15_19 "#Integration Courses Started 2015-2019 per Syrian"
label var per_syr_new_partic_15_19 "#Participants Integration Courses 2015-2019 per Syrian"
label var per_syr_finished_15_19 "#People Finishing Integration Courses 2015-2019 per Syrian"
label var log_per_syr_cours_compl_15_19 "Log #Integration Courses Completed 2015-2019 per Syrian"
label var log_per_syr_cours_start_15_19 "Log #Integration Courses Started 2015-2019 per Syrian"
label var log_per_syr_new_partic_15_19 "Log #Participants Integration Courses 2015-2019 per Syrian"
label var log_per_syr_finished_15_19 "Log #People Finishing Integration Courses 2015-2019 per Syrian"

foreach year in 2014 2019 { 
	label var susp_per_pop_for_vio_cr_`year' ///
		"Number of Foreign Suspects per Foreign Pop. Violent Crime `year'"
	label var susp_per_pop_de_vio_cr_`year' ///
		"Number of German Suspects per German Pop. Violent Crime `year'"
	label var cases_per_pop_`year' ///
		"Number of Reported Crimes per Pop. `year'"
	label var log_cases_per_pop_`year' ///
		"Log Number of Reported Crimes per Pop. `year'"	
	label var cases_per_pop_vio_cr_`year' ///
		"Number of Reported Violent Crimes per Pop. `year'"
	label var log_cases_per_pop_vio_cr_`year' ///
		"Log Number of Reported Violent Crimes per Pop. `year'"
	label var cases_per_pop_theft_`year' ///
		"Number of Reported Thefts per Pop. `year'"
	label var log_cases_per_pop_theft_`year' ///
		"Log Number of Reported Thefts per Pop. `year'"	
}
foreach type in vio_cr theft {
	if "`type'" =="vio_cr" 		local type_label "Violent Crime"
	if "`type'" =="theft" 		local type_label "Theft"
	label var pct_chg_susp_per_pop_de_`type' ///
		"% Change German Suspects `type_label' 2014-19"
	label var pct_chg_susp_per_pop_for_`type' ///
		"% Change Foreign Suspects `type_label' 2014-19"	
	label var pct_chg_cases_per_pop_`type' ///
		"% Change Reported `type_label' Crimes 2014-19"
}
label var pct_chg_susp_per_pop_de "% Change German Suspects 2014-19"
label var pct_chg_susp_per_pop_for "% Change Foreign Suspects 2014-19"	
label var pct_chg_cases_per_pop "% Change Reported Crimes 2014-19"	
	
foreach year in 2019 2014 {
	label var afd_`year' "Vote Share AFD European Elections `year'"
	label var afd_npd_`year' "Vote Share AFD and NPD European Elections `year'"
	label var log_afd_`year' "Log Vote Share AFD European Elections `year'"
	}

label var avg_hrly_wage "Average Hourly Wage Rate (in EUR)"
label var avg_inc "Average Income (in EUR)"
label var gdp_pc "GDP per capita (in EUR)"
label var log_avg_hrly_wage "Log Average Hourly Wage Rate (in EUR)"
label var log_avg_inc "Log Average Income (in EUR)"
label var log_gdp_pc "Log GDP per capita (in EUR)"

label var pop_dens_2018 "Population Density 2018"
label var log_pop_dens_2018 "Log Pop. Density 2018"

foreach year in 2019 2010 {
	label var frac_for_tot_`year' "Fraction of Foreigners `year'" 
	label var frac_syr_tot_`year' "Fraction of Syrians `year'" 
	label var log_frac_for_tot_`year' "Log Fraction of Foreigners `year'" 
	label var log_frac_syr_tot_`year' "Log Fraction of Syrians `year'" 
}

label var pct_empty_flats "% Empty Flats"
label var frac_christian "Fraction of Christians"
label var log_frac_christian "Log Fraction of Christians"

label var unemp_tot2014_per_pop "Number of People Unemployed per Pop."
label var unemp_uni_gen_schl2014_per_pop "Unemp. Gen. Schls + HS Teachers per Pop."
label var unemp_gen_schl2014_per_pop "Unemp. Gen. Schls Teachers per Pop."
label var unemp_uni2014_per_pop "Unemp. HS Teachers per Pop."
label var unemp_vocat2014_per_pop "Unemp. Vocational Schl Teachers per Pop."
label var unemp_uni_gen_oth2014_per_pop "Unemp. Gen. Schls + HS + Oth. Teachers per Pop."

label var avg_age "Average Age"
label var share_female "% Female"

* save resulting dataset
isid ags
save "${data_derived}/regional_analysis_data.dta", replace
